#Folate concentrations and serum perfluoroalkyl and 
#polyfluoroalkyl substance concentrations in adolescents and 
#adults in the USA (National Health and Nutrition 
# Examination Study 2003–16): an observational study
# 这周新做一篇文章复现 ! 2023年7月23日11:19:13 

# 这项研究基于2013-2016美国国家健康和营养检查调查（NHANES）的2802名青少年和9159名成年参与者数据，
#旨在探索血液中叶酸生物标志物浓度与PFAS浓度之间的关系。结果表明，在青少年和成人中，
#大多数检测的血清PFAS化合物与红细胞或血清中测量的叶酸浓度呈一致的负相关。这表明PFAS可能与叶酸竞争几种与PFAS毒性动力学相关的转运蛋白
#，这些发现可能对减少累积的PFAS身体负担和减轻相关不良健康影响的干预措施具有重要意义


library(tidyverse)
library(gtsummary)
library(ggplot2)
library(haven)
library(rms)
library(skimr)

setwd('F:/Rproject/r-language/paper5')


setwd("${path}")

 output12 <- read.csv(file= 'paper5data.csv')
output12 <- read.csv(file= '${path}paper5data.csv')
# write.csv(output12,file = 'paper5data.csv')
# 进行数据的切换tab1
tab1data <- output12[,c('SEQN', 'RIDAGEYR','RIAGENDR','RIDRETH1','BMXBMI','INDFMPIR','DMDEDUC2')]

# 定义一个函数 批量给数据进行转换 变量必须给全
addParseVa <- function(tab1data){
  tab1data$Sex <- ifelse(tab1data$RIAGENDR==1,'male','female')
  tab1data$race <- recode_factor(tab1data$RIDRETH1, 
                                 `1` = 'Hispanic',
                                 `2` = 'Hispanic',
                                 `3` = 'Non-Hispanic White',
                                 `4` = 'Non-Hispanic Black',
                                 `5` = 'Other,including multiracial')
  tab1data$edu <- recode_factor(tab1data$DMDEDUC2, 
                                `1` = 'Less than middle school',
                                `2` = 'middle school',
                                `3` = 'High school graduate',
                                `4` = 'College degree',
                                `5` = 'College degree or above')
  tab1data$FPL <- ifelse(tab1data$INDFMPIR<1.0,'< 1',
                         ifelse((tab1data$INDFMPIR>=1.0&tab1data$INDFMPIR<2.0) , ">=1 to <2" ,
                                ifelse((tab1data$INDFMPIR>=2.0) , ">=2" ,'')))
  tab1data$group <- ifelse(tab1data$RIDAGEYR<=19,'Adolescents','Adults')
  return(tab1data)
}
tab1data<- addParseVa(tab1data =tab1data )

tab1<-tbl_summary(tab1data,by = group,missing = "no",
                  
                  include = c('RIDAGEYR','BMXBMI', 'Sex','race','edu','FPL'),
            label = list(RIDAGEYR ~ "Age,years",BMXBMI ~"BMI,KG/M2",race~"Race and ethnicity",edu~"Education Level",FPL~"Income-to-poverty ratio"),
            statistic = list(
              # 分别对应数值型和分类变量 后是需要显示表达式
              all_continuous() ~ "{mean}({sd})"
            ),
            digits = list(all_continuous() ~ 2
            )
            )
tab1%>%as_flex_table()%>%flextable::save_as_html(path = 'paper5tab1.html')


logfunction<-function(tab2dataParam ){
  
  #log 转换
  tab2dataParam$LBXPFOA<- log(tab2dataParam$LBXPFOA)
  tab2dataParam$LBXPFOS<- log(tab2dataParam$LBXPFOS)
  tab2dataParam$LBXPFHS<- log(tab2dataParam$LBXPFHS)
  tab2dataParam$LBXPFNA<- log(tab2dataParam$LBXPFNA)
  tab2dataParam$serumfo<- log(tab2dataParam$serumfo)
  tab2dataParam$rbcfo<- log(tab2dataParam$rbcfo)
  
  return (tab2dataParam)
  
}

# 所有变量进行log转换防止特殊值引起的差异红细胞和血清中的叶酸浓度以及血清中的PFAS浓
#度进行了自然对数转换，以尽量减少异常值的影
#响，并改善对相关结果的解释
# 进行数据的切换tab1
#全氟辛酸(PFOA)PFAS LBXPFOA 
#全氟辛烷磺酸(PFOS) LBXPFOS
#全氟己烷磺酸(PFHxS ) LBXPFHS
#全氟壬酸(PFNA ) LBXPFNA
# 血清叶酸 serumfo  红细胞叶酸 rbcfo
# tab2data <- output12[,c('SEQN', 'RIDAGEYR','RIAGENDR','RIDRETH1','BMXBMI','INDFMPIR','DMDEDUC2','LBXPFOA','LBXPFOS','LBXPFHS','LBXPFNA','serumfo','rbcfo','totaldrFA')]
# tab2data <-addParseVa(tab1data = tab2data)
# table(tab2data$group)
# tab2data <- tab2data[which(tab2data$group=='Adolescents'),]
# 
# 
# colnames(output12)
# # 2.7倍怎么理解?
# m1 <- glm(LBXPFOA~rbcfo,data = tab2data)
# m2 <- glm(LBXPFOA~serumfo,data = tab2data)
# tbl_regression(m1)
# tbl_regression(m2)

# 先这样吧 2023年7月27日09:50:15

#table3 
# tab3data <- output12[,c('SEQN', 'RIDAGEYR','RIAGENDR','RIDRETH1','BMXBMI','INDFMPIR','DMDEDUC2','LBXPFOA','LBXPFOS','LBXPFHS','LBXPFNA','serumfo','rbcfo','totaldrFA')]
# 
# 
# tab3data<-logfunction(tab3data)
# 
# 
# tab3data <- tab3data[-which(tab3data$group!='Adolescents'),]

# 限制性立方样条
# 儿童
tabrcsdata <- output12[,c('SEQN', 'RIDAGEYR','RIAGENDR','RIDRETH1','BMXBMI','INDFMPIR','DMDEDUC2','LBXPFOA','LBXPFOS','LBXPFHS','LBXPFNA','serumfo','rbcfo','totaldrFA')]
tabrcsdata <-addParseVa(tab1data = tabrcsdata)
tabrcsdata<-logfunction(tabrcsdata)
write.csv(tabrcsdata,file = 'datafinal.csv')
tabrcsdataAdolescents <- tabrcsdata[which(tabrcsdata$group=='Adolescents'),]
tabrcsdataAdolescents$DMDEDUC2 <- 'middle'
ddist<-datadist(tabrcsdataAdolescents)
options(datadist="ddist")

#全氟辛酸(PFOA)PFAS LBXPFOA 
#全氟辛烷磺酸(PFOS) LBXPFOS
#全氟己烷磺酸(PFHxS ) LBXPFHS
#全氟壬酸(PFNA ) LBXPFNA
# PFOA RBC
fit1 <-lrm(rbcfo ~rcs(LBXPFOA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR1<-Predict(fit1,LBXPFOA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOARBC.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR1)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFOA concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) +ylim(c(-1, 5))


# PFOS RBC
fit2 <-lrm(rbcfo ~rcs(LBXPFOS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR2<-Predict(fit2,LBXPFOS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOSRBC.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR2)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFOS concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFOA serumfo
fit3 <-lrm(serumfo ~rcs(LBXPFOA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR3<-Predict(fit3,LBXPFOA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOAserumfo.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR3)+labs(x = 'log-transformed serum folate concentration ',y = 'log-transformed PFOA concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFOS serumfo
fit4 <-lrm(serumfo ~rcs(LBXPFOS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR4<-Predict(fit4,LBXPFOS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOSserumfo.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR4)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFOS concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFNA RBC
fit5 <-lrm(rbcfo ~rcs(LBXPFNA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR5<-Predict(fit5,LBXPFNA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFNARBC.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR5)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFNA concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFHXS RBC
fit6 <-lrm(rbcfo ~rcs(LBXPFHS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR6<-Predict(fit6,LBXPFHS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFHXSRBC.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR6)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFHXS concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFNA serumfo
fit7 <-lrm(serumfo ~rcs(LBXPFNA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR7<-Predict(fit7,LBXPFNA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFNAserumfo.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR7)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFNA concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFHXS serumfo
fit8 <-lrm(serumfo ~rcs(LBXPFHS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataAdolescents,x=FALSE)
pred_OR8<-Predict(fit8,LBXPFHS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFHXSserumfo.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR8)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFHXS concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# 儿童结束

# adult 开始
tabrcsdataadult <- output12[,c('SEQN', 'RIDAGEYR','RIAGENDR','RIDRETH1','BMXBMI','INDFMPIR','DMDEDUC2','LBXPFOA','LBXPFOS','LBXPFHS','LBXPFNA','serumfo','rbcfo','totaldrFA')]
tabrcsdataadult <-addParseVa(tab1data = tabrcsdataadult)
tabrcsdataadult<-logfunction(tabrcsdataadult)
tabrcsdataadult <- tabrcsdataadult[-which(tabrcsdataadult$group=='Adolescents'),]
ddist<-datadist(tabrcsdataadult)
options(datadist="ddist")

#全氟辛酸(PFOA)PFAS LBXPFOA 
#全氟辛烷磺酸(PFOS) LBXPFOS
#全氟己烷磺酸(PFHxS ) LBXPFHS
#全氟壬酸(PFNA ) LBXPFNA
# PFOA RBC
fit11 <-lrm(rbcfo ~rcs(LBXPFOA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR11<-Predict(fit11,LBXPFOA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOARBC11.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR11)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFOA concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) +ylim(c(-1, 5))

# PFOS RBC
fit22 <-lrm(rbcfo ~rcs(LBXPFOS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR22<-Predict(fit22,LBXPFOS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOSRBC22.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR22)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFOS concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFOA serumfo
fit33 <-lrm(serumfo ~rcs(LBXPFOA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR33<-Predict(fit33,LBXPFOA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOAserumfo33.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR33)+labs(x = 'log-transformed serum folate concentration ',y = 'log-transformed PFOA concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFOS serumfo
fit44 <-lrm(serumfo ~rcs(LBXPFOS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR44<-Predict(fit44,LBXPFOS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFOSserumfo44.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR44)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFOS concentration ')+
  theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+
  xlim(c(0, 7)) + ylim(c(-1, 5))

# PFNA RBC
fit55 <-lrm(rbcfo ~rcs(LBXPFNA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR55<-Predict(fit55,LBXPFNA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFNARBC55.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR55)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFNA concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFHXS RBC
fit66 <-lrm(rbcfo ~rcs(LBXPFHS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR66<-Predict(fit66,LBXPFHS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFHXSRBC66.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR66)+labs(x = 'log-transformed folate concentration in RBC',y = 'log-transformed PFHXS concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFNA serumfo
fit77 <-lrm(serumfo ~rcs(LBXPFNA,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR77<-Predict(fit77,LBXPFNA,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFNAserumfo77.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR77)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFNA concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# PFHXS serumfo
fit88 <-lrm(serumfo ~rcs(LBXPFHS,nk=5)+RIDAGEYR + Sex + race + BMXBMI + INDFMPIR ,data=tabrcsdataadult,x=FALSE)
pred_OR88<-Predict(fit88,LBXPFHS,ref.zero=TRUE,fun=exp)
Cairo::CairoTIFF(file="PFHXSserumfo88.tiff", width=8, height=8,units="in",dpi=150)
ggplot(pred_OR88)+labs(x = 'log-transformed serum folate concentration',y = 'log-transformed PFHXS concentration ')+theme(axis.title.x = element_text(margin = margin(t = 10), size = 15),axis.title.y = element_text(margin = margin(r = 10), size = 15))+xlim(c(0, 7)) + ylim(c(-1, 5))

# 成人结束

